Data input

if (!dir.exists("Spf66_calibration")) dir.create("Spf66_calibration")
if (!dir.exists("Metrics_of_interest")) dir.create("Metrics_of_interest")

RUN_ALL <- FALSE # flag whether or not to re-run fitting

# source code for model fitting
source("Model_calibration_code/clin_inf_likelihood_helper_func.R")
source("Model_calibration_code/Metropolis_Hastings_fit.R")
source("Model_calibration_code/simulate_clinical_recurrences.R")

# source code for metrics of epidemiological interest
source("Metrics_of_interest_code/metrics_under_stationary_hyp_reservoir.R")
source("Metrics_of_interest_code/classify_relapses.R")
source("Metrics_of_interest_code/relapses_per_bite.R")

# pre-processed data from SPf66 cohort for model fitting
patient_metadata <- read_rds("Spf66_data_cleaned/patient_metadata.rds")
inf_states_by_VIN <- read_rds("Spf66_data_processed/discretised_infection_matrix.rds")
SEASONALITY <- read_rds("Spf66_data_processed/seasonality_vector.rds") # inferred from Pf

N_COHORT <- nrow(patient_metadata)
keep_VIN <- as.character(patient_metadata$VIN)

# discretisation into uniform windows 
START_DATE <- as.Date("1993-10-01")
END_DATE <- as.Date("1995-07-15")
STUDY_DURATION <- as.numeric(difftime(END_DATE, START_DATE, unit="days"))
TIME_STEP <- 10
N_OBS <- STUDY_DURATION%/%10
SHIFT_WINDOW <- 20 # estimate Pv FOI separately before and after this window
YEAR_IN_TIMESTEP <- 365%/%TIME_STEP
THRESHOLD_AGE <- 2*YEAR_IN_TIMESTEP # possible stratification in FORI above/below this age

# fixed covariates
PREL_BASELINE <- 0.4 # based on in vivo experiments for Chesson strain
N_AGES <- patient_metadata[names(inf_states_by_VIN), "AGE"]*YEAR_IN_TIMESTEP # in units of TIME_STEP
names(N_AGES) <- names(inf_states_by_VIN)

# number of chains and iterations for MCMC (nested parallelisation over chains)
N_CHAINS <- 4
N_ITER <- 100000
BURNIN_PROP <- 0.2
N_CORES_PER_CHAIN <- 8

# parameters for Metropolis-Hastings proposal
LAMBDA_PROP_SD <- 0.02/365
NU_PROP_SD <- 0.2
ETA_PROP_SD <- 1/2000
LOGIT_RHO_PROP_SD <- 0.05
LOG_GAMMA_PROP_SD <- 0.05

# hyperparameters for informative priors
LOG_GAMMA_PRIOR_SD <- 0.6
LOGIT_RHO_PRIOR_SD <- 0.7

# parameters for posterior predictive data
N_POSTERIOR_PREDICTIVE_DATASETS <- 2000
PROP_MASKED <- 0.5
PROPHYLAXIS_WINDOW <- 1
BUNCHING_WINDOW <- 2

# observed data for posterior predictive checking
masking_periods <- read_rds("Spf66_data_processed/masking_periods.rds")
observed_incidence_by_indiv <- read_rds("Spf66_data_processed/incidence_by_individual.rds") 
observed_incidence_by_window <- read_rds("Spf66_data_processed/incidence_by_window.rds")
malaria_consultations_reconciled <- read_rds("Spf66_data_processed/malaria_consultations_reconciled.rds")

# formatting
POSTERIOR_COL <- "#cf7940"
OBSERVED_COL <- "#4878b8"

Inference with Metropolis-Hastings algorithm

Baseline model: assumes geometrically-distributed sporozoite inocula, with the ratio of hypnozoites to immediately-developing forms informed by in vivo estimates for the Chesson strain and no age-stratification in the force of inoculation

if (RUN_ALL || !file.exists("Spf66_calibration/MCMC_results_main.rds")) {
  set.seed("28091907")
  
  # rescale LAMBDA2 to be the mean FOI in the second stage of the study
  MCMC_results <- Metropolis_Hastings(inf_states_by_VIN, N_AGES, PREL_BASELINE, N_OBS, TIME_STEP,
                                    SEASONALITY, SHIFT_WINDOW, THRESHOLD_AGE, 1,
                                    N_CHAIN, N_ITER, N_CORES_PER_CHAIN,
                                    LAMBDA_PROP_SD, NU_PROP_SD, ETA_PROP_SD,
                                    LOGIT_RHO_PROP_SD, LOG_GAMMA_PROP_SD,
                                    LOG_GAMMA_PRIOR_SD, LOGIT_RHO_PRIOR_SD) %>%
      mutate(LAMBDA2=LAMBDA2*mean(tail(SEASONALITY, N_OBS)[(SHIFT_WINDOW+1):N_OBS]))
  
  write_rds(MCMC_results, "Spf66_calibration/MCMC_results_main.rds")

} else {
  
  MCMC_results <- read_rds("Spf66_calibration/MCMC_results_main.rds")
    
}

MCMC_posterior <- MCMC_results %>% subset(ITER>BURNIN_PROP*N_ITER) %>% mutate(CHAIN=as.character(CHAIN))

Convergence diagnostics

Pairwise posterior plots

Summary of posterior estimates

Below, we report posterior median estimates and 95% confidence intervals for various quantites of epidemiological interest.

variable median LCI UCI
Average duration of hypnozoite carriage (days) 170.7949150 143.5511455 205.9326978
Half-life of a hypnozoite batch (days) 118.3860138 99.5020718 142.7416689
Prob of activation in 2 week window 0.0787001 0.0657240 0.0929214
Prob of activation in 4 week window 0.1512065 0.1271283 0.1772085
Prob of activation in 12 week window 0.3884863 0.3349548 0.4429818
Prob of activation in 24 week window 0.6260510 0.5577149 0.6897307
Average sporozoite batch size 6.8569487 5.4376382 8.7320378
Average hypnozoite batch size 2.7427795 2.1750553 3.4928151
Prob of primary infection per bite 0.8044648 0.7654005 0.8397236
No primary but at least one relapse per bite 0.0682593 0.0575230 0.0792631
Prob at least 2 recurrences per bite 0.7887810 0.7488923 0.8253521
Relapse:primary ratio per bite 3.4094461 2.8417219 4.1594818
Prob relapse within 2 weeks of bite 0.1768263 0.1464061 0.2160710
Average force of inoculation in first stage of study (yearly) 0.5717149 0.4703640 0.6901984
Average force of inoculation in second stage of study (yearly) 0.4956555 0.4001830 0.6047811
Average force of prim inf in first stage of study (yearly) 0.4593294 0.3847773 0.5419750
Average force of prim inf in second stage of study (yearly) 0.3979545 0.3266305 0.4809973

ETA_POSTERIOR=median(MCMC_posterior$ETA)
NU_POSTERIOR=median(MCMC_posterior$NU)

FORI_POSTERIOR <- 
  time_dependent_fori(median(MCMC_posterior$LAMBDA1), 
                      median(MCMC_posterior$LAMBDA2)/
                        mean(tail(SEASONALITY, N_OBS)[(SHIFT_WINDOW+1):N_OBS]),
                      SHIFT_WINDOW, SEASONALITY)

AGES <- sort(unique(patient_metadata$AGE))
PCLIN_POSTERIOR <- sapply(AGES, function(a) {
  median(calculate_pclin(a, MCMC_posterior$LOGIT_RHO, MCMC_posterior$LOG_GAMMA, 
                           min(AGES), max(AGES)))})
names(PCLIN_POSTERIOR) <- AGES

Posterior predictive checking

We generate posterior predictive data under the model by sampling parameter combinations uniformly at random (without replacement) from the joint posterior. For comparability, we retain the age structure of the SPf66 cohort in addition to patterns of left/right-censoring and documented absences from the camp. The effects of post-exposure prophylaxis due to the treatment of symptomatic falciparum malaria is not accounted for.

MASKING_MATRIX <- masking_periods[["exc_prophylaxis"]] %>% 
  transmute(VIN=factor(VIN, levels=keep_VIN),
            window_start=START%/%TIME_STEP+1, window_end=END%/%TIME_STEP+1, 
            prop_window_start=1-(START%%TIME_STEP)/TIME_STEP, 
            prop_window_end=(END%%TIME_STEP)/TIME_STEP) %>%
  split(f=.$VIN) %>%
  sapply(function(masking_windows) {
    masking_windows <- masking_windows %>% dplyr::select(-VIN)
    
    inf_state <- rep(1, N_OBS)
    if (nrow(masking_windows)<1) return(inf_state)
    
    # fully masked windows
    fully_masked <- masking_windows %>% subset(window_end-window_start>1) %>% 
      apply(1, function(x) (x[1]+1):(x[2]-1)) 
    if (is.list(fully_masked)) fully_masked <- do.call(c, fully_masked)
    inf_state[fully_masked] <- NA
    
    # calculate proportion of each window that is masked due to prophylaxis or absence
    proportion_masked <- rep(0, N_OBS)
    proportion_masked[masking_windows$window_start] <- 
      proportion_masked[masking_windows$window_start] + masking_windows$prop_window_start
    proportion_masked[masking_windows$window_end] <- 
      proportion_masked[masking_windows$window_end] + masking_windows$prop_window_end
    inf_state[which(proportion_masked>PROP_MASKED)] <- NA
    return(inf_state)}) %>% t

write_rds(MASKING_MATRIX, "Spf66_data_processed/discretised_masking_matrix.rds")

Each simulated symptomatic episode is modeled to be treated with a long-lived antimalarial that gives rise to prophylactic masking for 1, and a potential bunching effect for the subsequent 2 windows (where each window spans 10 days).

set.seed("15051907")
  
posterior_draws <- MCMC_posterior[sample(1:nrow(MCMC_posterior), N_POSTERIOR_PREDICTIVE_DATASETS),]

MIN_AGE <- min(N_AGES)
MAX_AGE <- max(N_AGES)

posterior_predictive_cohorts <- lapply(1:N_POSTERIOR_PREDICTIVE_DATASETS, function(i) {
  simulate_dataset(time_dependent_fori(posterior_draws[i, "LAMBDA1"], 
                                        posterior_draws[i, "LAMBDA2"]/mean(tail(SEASONALITY, N_OBS)[(SHIFT_WINDOW+1):N_OBS]), 
                                        SHIFT_WINDOW, SEASONALITY),
                    TIME_STEP, N_AGES, N_OBS, posterior_draws[i, "NU"], 1, 
                    posterior_draws[i, "ETA"], PREL_BASELINE, PROPHYLAXIS_WINDOW, BUNCHING_WINDOW,
                    calculate_pclin(N_AGES, posterior_draws[i, "LOGIT_RHO"], 
                                   posterior_draws[i, "LOG_GAMMA"], MIN_AGE, MAX_AGE),
                    MASKING_MATRIX, 1, MIN_AGE)})

To evaluate model fit, we perform posterior predictive checks for age structure in the incidence of symptomatic vivax malaria, as well as time variation in incidence of symptomatic vivax malaria over the course of the study.

Age structure in symptomatic malaria episodes

We calculate the incidence rate, stratified by age at enrolment, as the quotient of the total number of symptomatic malaria episodes recorded for an age group and the cumulative duration of clinical follow-up for the age group.

incidence_rate <- list()

incidence_rate[["Posterior predictive"]] <- sapply(posterior_predictive_cohorts, function(x) {
  split(x, patient_metadata[rownames(x), "AGE"]) %>% 
    sapply(function(y) sum(y==1, na.rm = TRUE)/sum(!is.na(y))*365%/%TIME_STEP)}) %>% 
  apply(1, function(x) quantile(x, c(0.025, 0.5, 0.975))) %>% 
  t %>% as.data.frame %>% mutate(AGE=as.numeric(rownames(.)))


N_BOOTSTRAP_REPLICATES <- 2000
observed_incidence_bootstrap <- lapply(1:N_BOOTSTRAP_REPLICATES, function(i) {
  observed_incidence_by_indiv[sample(1:N_COHORT, N_COHORT, replace=T),]})

incidence_rate[["Observed"]] <- lapply(observed_incidence_bootstrap, function(x) {
  x %>% group_by(AGE) %>% summarise(mean_incidence=sum(N_Pv)/sum(Followup_Pv))}) %>% 
  bind_rows(.id="iter") %>%
  group_by(AGE) %>% 
  summarise(`2.5%`=quantile(mean_incidence, 0.025),
            `50%`=quantile(mean_incidence, 0.5),
            `97.5%`=quantile(mean_incidence, 0.975))

Additionally, we consider empirical cumulative distribution functions for the incidence by individual, stratified by age group.

age_counts <- patient_metadata %>% group_by(AGE) %>% summarise(count=n()) %>%
  mutate(count=factor(paste0(AGE, " years old\n(n=", count, ")"),
                      levels=paste0(2:15, " years old\n(n=", count, ")")))

indiv_incidence_simulated <- lapply(posterior_predictive_cohorts, function(x) {
  data.frame(incidence=rowSums(x==1, na.rm=TRUE)/rowSums(!is.na(x))*(365%/%TIME_STEP), 
             AGE=patient_metadata[rownames(x), "AGE"]) %>%
    arrange(AGE, incidence) %>% mutate(index=1:nrow(.))}) %>%
  bind_rows(.id="iter") %>% group_by(AGE, index) %>% 
  summarise(median=median(incidence), LCI=quantile(incidence, 0.025), 
            UCI=quantile(incidence, 0.975)) %>% split(f=.$AGE) %>% 
  lapply(function(x) x %>% mutate(index=seq(1/nrow(.), 1, 1/nrow(.)))) %>% 
  bind_rows %>% merge(age_counts)

Incidence in windows

We compare the incidence by window for posterior predictive data against the observed incidence of symptomatic vivax malaria (calculated in 20 day windows, with a moving average across half-windows).

posterior_predictive_incidence_by_window <- lapply(posterior_predictive_cohorts, function(y) {
  per_window <- colSums(y==1, na.rm = TRUE)/colSums(!is.na(y))
  return(sapply(1:(length(per_window)-1), 
                function(i) mean(c(per_window[i], per_window[i+1]))))}) %>%
  do.call(cbind, .) %>% 
  apply(1, function(x) quantile(x*YEAR_IN_TIMESTEP, c(0.025, 0.5, 0.975))) %>% 
  t %>% as.data.frame %>% 
  mutate(Date=days((1:(N_OBS-1))*TIME_STEP)+START_DATE)

Vivax malaria following falciparum monoinfection

For 2000 parameter combinations sampled uniformly at random from the posterior, we derive the likelihood of observing a clinical vivax recurrence in a specified window due to either spontaneous hypnozoite activation or reinfection event. In doing so, we condition on the history of vivax recurrence prior to the baseline falciparum episode for each child; in addition to potential masking in the window due to lapses in clinical follow-up (left- or right-censoring, or a documented absence from the camp) or post-exposure prophylaxis following the treatment of falciparum malaria. Given the resultant likelihood vector, we model the number of falciparum monoinfections followed by vivax within the specified windows to follow a Poisson binomial distribution (i.e. as a sum of independent, but non-identically distributed Bernoulli random variables).

POST_WINDOWS <- 2:8

Pv_treatment_masking <- malaria_consultations_reconciled %>%
  subset(VIVAX==1 & VIVAX_MASKING>0) %>%
  transmute(VIN=factor(VIN, levels=keep_VIN),
            START=RelativeTime%/%TIME_STEP+2,
            END=(TIME_STEP*(RelativeTime%/%TIME_STEP)+TIME_STEP/2+VIVAX_MASKING)%/%TIME_STEP)

FORI_posterior_subset <- mapply(time_dependent_fori, 
                                posterior_draws$LAMBDA1,
                                posterior_draws$LAMBDA2/mean(tail(SEASONALITY, N_OBS)[(SHIFT_WINDOW+1):N_OBS]),
                                SHIFT_WINDOW, list(SEASONALITY), SIMPLIFY = F)

vivax_recur_prob_indiv <- function(my_VIN, WINDOW, POST_WINDOW) {
  inf_states <- inf_states_by_VIN[[my_VIN]]
  
  target_window <- WINDOW+(1:POST_WINDOW)
  target_window <- subset(target_window, target_window<=N_OBS)
  
  # mask any infections after the falciparum episode window
  if (max(target_window)<N_OBS) {
    inf_states[(max(target_window)+1):N_OBS] <- "M"
  }
  
  # unmask prophylaxis due to Pv in the target window
  Pv_after_Pf_masking <- Pv_treatment_masking %>% subset(VIN==my_VIN & START>WINDOW+1)
  Pv_after_Pf_masking <- 
    intersect(do.call(c, mapply(function(i, j) {i:j}, 
                                Pv_after_Pf_masking$START, Pv_after_Pf_masking$END, 
                                SIMPLIFY = FALSE)), target_window)
  if (length(Pv_after_Pf_masking)>0) inf_states[Pv_after_Pf_masking] <- "N"
  
  # generate S vectors before and after baseline Pf
  no_Pv_after_Pf <- masked_after_Pf <- inf_states
  no_Pv_after_Pf[target_window] <- gsub("C|B", "N",  no_Pv_after_Pf[target_window])
  masked_after_Pf[target_window] <- "M"
  
  if (all(inf_states[target_window]=="M")) return(rep(NA, N_POSTERIOR_PREDICTIVE_DATASETS))
  
  Pv_after_Pf_observed <- any(inf_states[target_window]=="C")

  S_vectors_no_Pv_after_Pf <- generate_S_vectors(list(no_Pv_after_Pf))
  S_vectors_masked_after_Pf <- generate_S_vectors(list(masked_after_Pf))
  
  likelihood_no_Pv_after_Pf <- mcmapply(inc_exc_likelihood,
                                        S_vectors_no_Pv_after_Pf[["S_plus"]][1], 
                                        S_vectors_no_Pv_after_Pf[["S_minus"]][1], 
                                        patient_metadata[my_VIN, "AGE"]*YEAR_IN_TIMESTEP,
                                        calculate_pclin(patient_metadata[my_VIN, "AGE"],
                                                        posterior_draws$LOGIT_RHO, 
                                                        posterior_draws$LOG_GAMMA, 
                                                        min(AGES), max(AGES)),
                                        FORI_posterior_subset,
                                        posterior_draws$ETA, posterior_draws$NU,
                                        PREL_BASELINE, N_OBS, TIME_STEP, 1, 1)
  
  likelihood_masked_after_Pf <- mcmapply(inc_exc_likelihood,
                                         S_vectors_masked_after_Pf[["S_plus"]][1],
                                         S_vectors_masked_after_Pf[["S_minus"]][1], 
                                         patient_metadata[my_VIN, "AGE"]*YEAR_IN_TIMESTEP,
                                         calculate_pclin(patient_metadata[my_VIN, "AGE"],
                                                         posterior_draws$LOGIT_RHO,
                                                         posterior_draws$LOG_GAMMA, 
                                                         min(AGES), max(AGES)),
                                         FORI_posterior_subset,
                                         posterior_draws$ETA, posterior_draws$NU,
                                         PREL_BASELINE, N_OBS, TIME_STEP, 1, 1)
  
  return(1-likelihood_no_Pv_after_Pf/likelihood_masked_after_Pf)
}

Confounding due to seasonality

To gauge confounding due to seasonality, we consider the observed vs model-predicted rate of vivax recurrence in fixed windows following various time points in the SPf66 trial.

vivax_recur_cohort <- function(WINDOW, POST_WINDOW) {
  #print(paste0(WINDOW, ", ", POST_WINDOW))
  prob_per_window <- sapply(as.character(patient_metadata$VIN), 
                            vivax_recur_prob_indiv, WINDOW, POST_WINDOW)
  prob_per_window <- prob_per_window[,colMeans(apply(prob_per_window, 2, is.na))<1]
  
  if (length(prob_per_window)==0 | is.null(dim(prob_per_window))) return(NULL) 

  N_partial_followup <- ncol(prob_per_window)
  N_Pv <- length(unique((malaria_consultations_reconciled %>% 
                           subset(VIVAX==1 & RelativeTime>TIME_STEP*WINDOW &
                                    RelativeTime<TIME_STEP*(WINDOW+POST_WINDOW)))$VIN))
  
  Pv_prop_vals <- seq(0, 1, 1/N_partial_followup)
      
  Pv_prop_cdf <- apply(prob_per_window, 1, function(pp) {
      pp <- subset(pp, pp>0 & pp<1)
      return(poisbinom::ppoisbinom(length(pp)*Pv_prop_vals, pp))})

  Pv_summary <- data.frame(Prop_Pv=Pv_prop_vals, CDF=rowMeans(Pv_prop_cdf),
             WINDOW=WINDOW, POST_WINDOW=POST_WINDOW, observed=FALSE)
  Pv_summary[N_Pv+1, "observed"] <- TRUE
  
  return(Pv_summary)
}

if (RUN_ALL || !file.exists("Spf66_calibration/vivax_recur_cohort_pred.rds")) {
  
  vivax_recur_cohort_pred <- mapply(vivax_recur_cohort, 
                                  rep(seq(0, 55, 5), each=length(POST_WINDOWS)),
                                  rep(POST_WINDOWS, 12), SIMPLIFY = FALSE)
  
  write_rds(vivax_recur_cohort_pred, "Spf66_calibration/vivax_recur_cohort_pred.rds")
  
} else {
  
  vivax_recur_cohort_pred <- read_rds("Spf66_calibration/vivax_recur_cohort_pred.rds")
  
}


vivax_recur_CrI <- 
  data.frame(WINDOW=sapply(vivax_recur_cohort_pred, function(x) x[1, "WINDOW"]),
             POST_WINDOW=sapply(vivax_recur_cohort_pred, function(x) x[1, "POST_WINDOW"])*TIME_STEP,
             LCI=sapply(vivax_recur_cohort_pred, function(x) min(subset(x$Prop_Pv, x$CDF>=0.005))),
             UCI=sapply(vivax_recur_cohort_pred, function(x) min(subset(x$Prop_Pv, x$CDF>=0.995))),
             observed=sapply(vivax_recur_cohort_pred, function(x) subset(x$Prop_Pv, x$observed)[1]),
             pvalue=sapply(vivax_recur_cohort_pred, function(x) subset(x$CDF, x$observed)[1])) %>%
  mutate(BASELINE=paste0(START_DATE+days(TIME_STEP*WINDOW), " (day ", TIME_STEP*WINDOW, ")"))

Vivax after falciparum

Various studies have demonstrated an elevated risk of vivax malaria following febrile falciparum episodes. To mimic the structure of a typical study, we focus on the burden of vivax recurrence in fixed follow-up windows following each symptomatic falciparum monoinfection. We stratify baseline episodes based on treatment with artesunate monotherapy vs artesunate-mefloquine combination therapy, with nearest neighbour matching for the time of the baseline episode and the number of previously recorded vivax episodes.

falcip_monoinf <- malaria_consultations_reconciled %>%
  mutate(WINDOW=RelativeTime%/%TIME_STEP+1) %>%
  subset(VIVAX==0 & FALCIP==1 & Treatment %in% c("AS", "AS+MF") & WINDOW<N_OBS-max(POST_WINDOWS)-1 & 
           !near_treatment_failure) %>% 
  transmute(VIN=as.character(VIN), WINDOW=RelativeTime%/%TIME_STEP+1,
            Treatment=Treatment, PfTime=RelativeTime, 
            Group=as.numeric(Treatment=="AS"))

falcip_monoinf$Prev_Pv_ep <- apply(falcip_monoinf, 1, function(x) 
  malaria_consultations_reconciled %>% 
    subset(as.character(VIN)==as.character(x[["VIN"]]) & VIVAX==1 & 
             RelativeTime<as.numeric(x[["PfTime"]])) %>% nrow)

falcip_monoinf$Next_Pv_ep <- apply(falcip_monoinf, 1, function(x) {
  Pv_after_baseline <- malaria_consultations_reconciled %>%
    subset(as.character(VIN)==as.character(x[["VIN"]]) & VIVAX==1 & 
             RelativeTime>as.numeric(x[["PfTime"]]))
  if (nrow(Pv_after_baseline)==0) return(Inf)
  return(min(Pv_after_baseline$RelativeTime)%/%TIME_STEP+1)})
  
match_falcip_monoinf_covariates <- 
  matchit(Group ~ Prev_Pv_ep + PfTime, data=falcip_monoinf,
          distance="glm", method="nearest")

falcip_monoinf <- 
  match.data(match_falcip_monoinf_covariates, drop.unmatched = FALSE)

falcip_monoinf_groups <- list()

falcip_monoinf_groups[["AS"]] <- which(falcip_monoinf$Treatment=="AS")
falcip_monoinf_groups[["AS+MF"]] <- which(falcip_monoinf$Treatment=="AS+MF")
falcip_monoinf_groups[["AS+MF matched"]] <- 
  which(falcip_monoinf$Treatment=="AS+MF" & falcip_monoinf$weights==1)

names(falcip_monoinf_groups) <- paste0(names(falcip_monoinf_groups), " (n=",
                                       sapply(falcip_monoinf_groups, length), ")")
vivax_recur_prob_windows <- lapply(POST_WINDOWS, function(W) {
  mapply(vivax_recur_prob_indiv, falcip_monoinf$VIN, falcip_monoinf$WINDOW, W)})
#write_rds(vivax_recur_prob_windows, "vivax_recur_prob_windows.rds")

prop_pv_after_pf <- function(Pf_monoinf_subset) {
  lapply(1:length(POST_WINDOWS), function(i) {
    
    POST_WINDOW <- POST_WINDOWS[i]
    
    # restrict attention to the subset of baseline episode with at least partial
    # clinical follow-up in the window of interest
    x <- vivax_recur_prob_windows[[i]][,Pf_monoinf_subset]
    x <- x[,colMeans(apply(x, 2, is.na))<1]

    if (length(x)==0) {
      return(data.frame(Prop_Pv=NA, CDF=NA))
    }
    
    if (is.null(dim(x))) {
      return(data.frame(Prop_Pv=NA, CDF=NA))
    }

    N_Pv <- sum(falcip_monoinf[Pf_monoinf_subset,]$Next_Pv_ep-
                       falcip_monoinf[Pf_monoinf_subset,]$WINDOW <= POST_WINDOW)
    
    Pv_prop_vals <- seq(0, 1, 1/ncol(x))
      
    Pv_prop_cdf <- apply(x, 1, function(pp) {
      pp <- subset(pp, pp>0 & pp<1)
      return(poisbinom::ppoisbinom(length(pp)*Pv_prop_vals, pp))})
    
    Pv_prop_summary <- data.frame(Prop_Pv=Pv_prop_vals, CDF=rowMeans(Pv_prop_cdf),
                                  POST_WINDOW=POST_WINDOW, observed=FALSE)
    Pv_prop_summary[N_Pv+1, "observed"] <- TRUE
    
    return(Pv_prop_summary)}) %>% bind_rows()
}

prop_pv_after_pf_pred <- lapply(falcip_monoinf_groups, prop_pv_after_pf)

prop_pv_after_pf_CrI <- lapply(prop_pv_after_pf_pred, function(x) {
  x %>% split(f=.$POST_WINDOW) %>% 
    lapply(function(x) c(POST_WINDOW=x[1, "POST_WINDOW"], 
                         LCI=min(subset(x$Prop_Pv, x$CDF>=0.005)), 
                         UCI=min(subset(x$Prop_Pv, x$CDF>=0.995)), 
                         observed=subset(x$Prop_Pv, x$observed)[1], 
                         pvalue=subset(x$CDF, x$observed)[1])) %>% bind_rows()
  }) %>% bind_rows(.id="Group")
Group POST_WINDOW LCI UCI observed pvalue
AS (n=120) 2 0.0180180 0.1351351 0.1081081 2.422e-02
AS (n=120) 3 0.0438596 0.1929825 0.2807018 1.044e-07
AS (n=120) 4 0.0683761 0.2393162 0.3589744 1.162e-09
AS (n=120) 5 0.1016949 0.2711864 0.4322034 1.098e-11
AS (n=120) 6 0.1271186 0.3050847 0.4830508 1.630e-12
AS (n=120) 7 0.1440678 0.3389831 0.5000000 2.050e-11
AS (n=120) 8 0.1694915 0.3644068 0.5254237 2.520e-11
AS+MF (n=607) 4 0.0276243 0.0755064 0.0276243 9.943e-01
AS+MF (n=607) 5 0.0588235 0.1193772 0.0813149 6.617e-01
AS+MF (n=607) 6 0.0870307 0.1569966 0.1467577 2.415e-02
AS+MF (n=607) 7 0.1118644 0.1881356 0.2067797 8.982e-05
AS+MF (n=607) 8 0.1336717 0.2148900 0.2487310 1.545e-06
AS+MF matched (n=120) 4 0.0098039 0.1176471 0.0196078 9.224e-01
AS+MF matched (n=120) 5 0.0275229 0.1743119 0.1009174 3.289e-01
AS+MF matched (n=120) 6 0.0535714 0.2142857 0.1875000 2.879e-02
AS+MF matched (n=120) 7 0.0789474 0.2543860 0.2631579 1.407e-03
AS+MF matched (n=120) 8 0.1052632 0.2807018 0.2982456 1.113e-03

Metrics of epidemiological interest

We now derive metrics of epidemiological interest, predicated on our posterior median estimates.

Overdispersion and zero-inflation of the hypnozoite reservoir

Here, we consider the size of the hypnozoite reservoir, and the time to spontaneous clearance of the hypnozoite reservoir (with a threshold probability) assuming mosquito-to-human transmission is temporarily interrupted (e.g. due to widespread administration of ivermectin).

Assumption: hypnozoite reservoir has reached stationarity under a constant force of inoculation.

# Size of the hypnozoite reservoir at stationarity
HMAX <- 11
TIMES <- seq(0, 730, 0.05)
hyp_tail_dist <- sapply(COARSE_LAMBDA_VALS, function(x) {
  hyp_reservoir_stationary_cdf(HMAX, ETA_POSTERIOR, NU_POSTERIOR, PREL_BASELINE, x)}) %>% 
  as.data.frame %>% mutate(n_hyp=0:HMAX) %>% reshape2::melt(id="n_hyp") %>% 
  transmute(n_hyp=n_hyp+1, tail_prob=1-value,
            lambda=COARSE_LAMBDA_VALS[as.numeric(gsub("V", "", variable))]*365) 

# Time to spontaneous clearance of the hypnozoite reservoir
# (assuming mosquito-to-human transmission temporarily interrupted)
THRESHOLD_VALS <- c(0.8, 0.85, 0.9, 0.95, 0.96, 0.97, 0.98, 0.99)
clearance_time <- lapply(THRESHOLD_VALS, function(THRESHOLD) 
  data.frame(LAMBDA=FINE_LAMBDA_VALS*365, THRESHOLD=THRESHOLD, 
             Clearance_Prob_Time=sapply(FINE_LAMBDA_VALS, function(x) 
               time_to_clear_hyp(THRESHOLD, ETA_POSTERIOR, NU_POSTERIOR, PREL_BASELINE, x)))) %>% 
  bind_rows()

At a force of inoculation of 0.5 bites per year, only 27% of people are predicted to carry hypnozoites; however, to ensure spontaneous hypnozoite clearance with probability 0.98, mosquito-to-human transmission would need to be interrupted for 1.6 years. `

Recent recurrence as a predictor of hypnozoite carriage

Here, we consider the predictive value of recent bloodstream infection as a predictor of hypnozoite carriage.

Assumption: hypnozoite reservoir has reached stationarity under a constant force of inoculation.

SEROTAT_WINDOW <- 270

sensitivity_specificity_recent_recur <- sapply(FINE_LAMBDA_VALS, function(LAMBDA) 
  recent_recur_accuracy_stationary(SEROTAT_WINDOW, ETA_POSTERIOR, NU_POSTERIOR, PREL_BASELINE, LAMBDA)) %>% 
  t %>% as.data.frame %>% 
  transmute(LAMBDA=FINE_LAMBDA_VALS*365, Hyp_Carriage=Hyp_Carriage,
            Recent_Recur=Sensitivity*Hyp_Carriage/Recent_Recurrence,
            No_Recent_Recur=1-Specificity*(1-Hyp_Carriage)/(1-Recent_Recurrence)) %>%
  reshape2::melt(id="LAMBDA")

At a force of inoculation of 0.5 infectious bites per year, individuals who have had recent recurrences are 2 as likely to harbour hypnozoites than randomly-sampled individuals; for 2 bites year, they are only 1.1 times more likely to harbour hypnozoites. The false omission rate (conditional probability of hypnozoite carriage given no recent recurrences) is generally low but rises to 18% under a force of inoculation of 2 infectious bites per year.

Theoretical comparison of MSAT vs MDA

Here, we perform a theoretical comparison of MSAT vs MDA, whilst accommodating population heterogeneity in the form of a Gamma-distributed force of inoculation (Smith et al, 2005).

Assumptions: for each individual, the hypnozoite reservoir has reached stationarity under a force of inoculation sampled from a uniform distribution; the outcome of the serological test given recent recurrence is conditionally independent to hypnozoite carriage.

Note: the expression for the proportion of hypnozoite carriers who are correctly-targeted is formulated in terms of the Hurwitz-Zeta function; we have computed this expression using Mathematica, since there is no available implementation in R.

SEROTAT_WINDOW <- 270
SEROTAT_SPEC <- 0.8
SEROTAT_SENS <- 0.8
THETA_VALS <- 10^seq(-7, 0, 0.01)

serotat_specificity_het <- 
  lapply(COARSE_LAMBDA_VALS, function(LAMBDA_MEAN) {
    data.frame(THETA=THETA_VALS, LAMBDA=LAMBDA_MEAN*365, 
               Specificity=sapply(THETA_VALS, function(THETA) {
                 recent_recur_specificity_stationary_het(SEROTAT_WINDOW, ETA_POSTERIOR, NU_POSTERIOR,
                                                         PREL_BASELINE, LAMBDA_MEAN, THETA)}))}) %>% 
  bind_rows() %>% mutate(PROP_TOP_20=prop_bites_top_20(THETA, LAMBDA/365),
                         GINI_COEFF=acid::gini.gamma(THETA))

# Hurwitz-zeta function (with shift!=1) not implemented in R, sensitivity
# in the presence of heterogeneity computed in Mathematica; numerical overflow
# and loss of precision if theta too small
param_vals <- expand.grid(SEROTAT_WINDOW=SEROTAT_WINDOW, ETA=ETA_POSTERIOR, NU=NU_POSTERIOR,
                          PREL=PREL_BASELINE, LAMBDA=COARSE_LAMBDA_VALS, THETA=THETA_VALS) %>%
  subset(THETA>=10^-4)

write.csv(param_vals, file="Metrics_of_interest_code/recent_recur_sensitivity_stationary_het_params.csv", row.names = FALSE)

recent_recur_sensitivity_het <- 
  read.delim("Metrics_of_interest_code/recent_recur_sensitivity_stationary_het_Mathematica.dat", header=FALSE)
  
serotat_sensitivity_het <- 
  bind_cols(param_vals[, c("LAMBDA", "THETA")], recent_recur_sensitivity_het) %>%
  transmute(PROP_TOP_20=prop_bites_top_20(THETA, LAMBDA), LAMBDA=LAMBDA*365, Sensitivity=V1)

serotat_sensitivity <- 
  data.frame(LAMBDA=COARSE_LAMBDA_VALS*365, PROP_TOP_20=0.2,
             Sensitivity=sapply(COARSE_LAMBDA_VALS, function(x) 
               recent_recur_accuracy_stationary(SEROTAT_WINDOW, ETA_POSTERIOR, NU_POSTERIOR, PREL_BASELINE, x)[["Sensitivity"]]))

serotat_sensitivity_het <- bind_rows(serotat_sensitivity_het, serotat_sensitivity)

In a population where individuals are bitten once a year on average, but 20% of individuals receive 80% of all infective bites (Gini coefficient 0.985), we predict serological MSAT to yield a 2.8 fold reduction in overtreatment relative to MDA (in the eligible population).

Consecutive recurrences derived from the same sporozoite batch

Here, we assume that an individual has a baseline recurrence on day 1 and consider both the time to the secondary recurrence, and the probability that the baseline and secondary recurrences are derived from different sporozoite batches.

Assumption: hypnozoite reservoir has reached stationarity under a constant force of inoculation.

# discretised in time steps of days
N_VALS <- 2:250
baseline_to_secondary_recurrence <- 
  lapply(COARSE_LAMBDA_VALS, function(LAMBDA) {
     data.frame(LAMBDA=LAMBDA*365, time=c(0, N_VALS-1),
               cdf_next_recur=cumsum(c(0, conditional_prob_next_recurrence(N_VALS, 1, LAMBDA, 
                                            NU_POSTERIOR, PREL_BASELINE, ETA_POSTERIOR))),
               prob_same_batch=c(NA, conditional_prob_same_batch(N_VALS, 1, LAMBDA, 
                                  NU_POSTERIOR, PREL_BASELINE, ETA_POSTERIOR)))}) %>% bind_rows

Under a force of inoculation of 0.5 bites per year, we predict 38% of baseline recurrences to be followed by a second recurrence within 28 days of follow-up, with this figure rising to 55% under a more intense force of inoculation of 2 bites per year.

Given a secondary recurrence occurs precisely 28 days after a baseline recurrence, we preiduct that it is derived from a different sporozoite batch with probability 0.22 under a force of inoculation of 0.5 bites per year, but a substantially higher probability of 0.55 under a force of inoculation of 2 bites per year.

Background recurrence rate

Granular analysis of the risk of vivax malaria following symptomatic falciparum malaria is confounded by seasonality. As a baseline compartor, we derive the force of inoculation required to yield spontaneous recurrence with a given probability in any defined window of follow-up, accounting for antidisease masking (but without adjusting for post-exposure prophylaxis).

Ignoring the effects of seasonality and post-exposure prophylaxis, we can derive the force of inoculation required to yield recurrence with a given probability over a period of clinical follow-up.

Assumption: hypnozoite reservoir has reached stationarity under a constant force of inoculation.

background_recurrence <- 
  expand.grid(W=c(30, 60, 90), p_recur=seq(0, 0.5, 0.01), PCLIN=seq(0.5, 1, 0.1)) %>%
  mutate(FORI=fori_required_for_recur(W, p_recur, NU_POSTERIOR, PREL_BASELINE, ETA_POSTERIOR, PCLIN)*365,
         W=paste0(W, " days of follow-up"))

To ensure spontaneous recurrence with probability 0.14 over 60 days of follow-up, an average of 0.3652257 bites per year would be required; adjusting for antidisease masking, with each hypnozoite activation and immediate sporozoite development event giving rise to clinical symptoms with probability 0.75, this this figure rises to an average of 0.4533424 bites per year. To explain the corresponding proportion of 0.42 over 60 days of follow-up after rapidly-eliminated antimalarial treatment, we would instead require 1.319086 bites per year (ignoring antidisease masking) or 1.6373373 bites per year in the setting where each hypnozoite activation and immediate sporozoite development event giving rise to clinical symptoms with probability 0.75.

Relapse burden per bite

Here, we consider the inter-relapse times following a single infective bite (with a geometrically-distributed hypnozoite inoculum), with the assumption that a hypnozoite activation event gives rise to a detectable relapse if and only if it occurs at least T_MASK days after the previous detectable relapse.

T_MASK <- 10
relapses_per_bite <- detectable_relapses_per_bite(NU_POSTERIOR, PREL_BASELINE, ETA_POSTERIOR, T_MASK)
inter_relapse_cdf_per_bite <- inter_relapse_cdfs(NU_POSTERIOR, PREL_BASELINE, ETA_POSTERIOR, T_MASK, seq(0, 300, 2))
n_relapses activated detected
1 0.7328189 0.7328189
2 0.5370235 0.5285090
3 0.3935410 0.3748590
4 0.2883943 0.2612981
5 0.2113408 0.1788722
6 0.1548745 0.1201619
7 0.1134950 0.0791551
8 0.0831712 0.0510911
9 0.0609495 0.0322868
10 0.0446649 0.0199605
11 0.0327313 0.0120624
12 0.0239861 0.0071196
13 0.0175775 0.0041009
14 0.0128811 0.0023032
15 0.0094395 0.0012604
16 0.0069175 0.0006712

Ignoring any masking of hypnozoite activation events in quick succession, we also calculate the coefficient of variation (CV) for the mth inter-relapse times following a single infective bite. We can show that the CV is independent of m.

CV <- sapply(MCMC_posterior$NU, 
             function(NU) sqrt(2*NU*PREL_BASELINE*HMMcopula::dilog(1/(1+NU*PREL_BASELINE)) -
                                 log(1+NU*PREL_BASELINE)^2)/log(1+NU*PREL_BASELINE))

We estimate this CV to be 1.41 (95% CrI 1.35 to 1.48).

Probabilistic classification of relapse vs primary infection

Here, we derive the probability that each recorded clinical recurrence is a relapse (defined to be the probability that it is caused by hypnozoite activation event(s) only). In deriving this probability, we account for the effects of prophylactic bunching, and condition on all observed inter-recurrence intervals, both before and after the target episode. Unlike other metrics, this classification additionally relies on posterior estimates for the force of inoculation (including seasonality) and the age-dependent probability of symptomatic infection.

PCLIN_VALS <- PCLIN_POSTERIOR[as.character(N_AGES/YEAR_IN_TIMESTEP)]
names(PCLIN_VALS) <- names(N_AGES)

relapse_classfication <- 
  classify_relapses(inf_states_by_VIN, N_AGES, PCLIN_VALS, FORI_POSTERIOR, 
                    ETA_POSTERIOR, NU_POSTERIOR, PREL_BASELINE)

We visualise probabilistic classifications for all 7 year olds with at least two recorded recurrences below. Crosses indicate recorded falciparum infections; light grey bars indicate masking (due to left- or right-censoring, a documented absence from the camp or post-exposure prophylaxis); and dark grey boxes indicate recurrences which could not be probabilistically classified due to numerical errors.